Analysis date: 2023-09-20
CRC_Xenografts_Batch2_DataProcessing Script Xenografts_Batch1_2_DataProcessing Script
load("../Data/Cache/Xenografts_Batch1_2_DataProcessing.RData")
all_pY_peptides_form <- rbind( (pY_Set1_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ),
(pY_Set2_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ) ) %>%
unique
pY_Batch1_form <- plyr::join_all(list(all_pY_peptides_form, pY_Set1_form,
pY_Set2_form),
by=c('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions' ),
type='left') %>% as_tibble()
pY_Batch1_form
## # A tibble: 650 × 40
## Annotated_Sequence HGNC_Symbol Master.Protein.Acces…¹ Master.Protein.Descr…²
## <chr> <chr> <chr> <chr>
## 1 ADGTNTGFPRyPNDSVYA… RET P07949 Proto-oncogene tyrosi…
## 2 ADHQPLTEASyVNLPTIA… RPSA P08865 40S ribosomal protein…
## 3 ADyDTLSLR PKP3 Q9Y446 Plakophilin-3 OS=Homo…
## 4 AEDGSVIDyELIDQDAR ANXA2 P07355 Annexin A2 OS=Homo sa…
## 5 AEERPTFDYLQSVLDDFy… LYN P07948 Tyrosine-protein kina…
## 6 AEGSDVANAVLDGADCIM… PKM P14618 Pyruvate kinase PKM O…
## 7 AESGPDLRyEVTSGGGGT… DSP P15924 Desmoplakin OS=Homo s…
## 8 AGSLPNyATINGK TNS1 Q9HBL0 Tensin-1 OS=Homo sapi…
## 9 ALELDSNLyR MYH9 P35579 Myosin-9 OS=Homo sapi…
## 10 ALNGAEPNyHSLPSAR LAMTOR1 Q6IAA8 Ragulator complex pro…
## # ℹ 640 more rows
## # ℹ abbreviated names: ¹​Master.Protein.Accessions, ²​Master.Protein.Descriptions
## # ℹ 36 more variables: log2FC_Xenograft_EC_24h_2_Set1 <dbl>,
## # log2FC_Xenograft_EC_24h_5_Set1 <dbl>, log2FC_Xenograft_E_24h_4_Set1 <dbl>,
## # log2FC_Xenograft_ctrl_5d_7_Set1 <dbl>,
## # log2FC_Xenograft_ctrl_5d_1_Set1 <dbl>,
## # log2FC_Xenograft_EC_24h_1_Set1 <dbl>, …
pY_mat_Batch1 <- pY_Batch1_form %>%
mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("rname") %>%
select(all_of( contains("Xenograft") )) %>%
as.matrix() %>%
t
pY_mat_Batch1[1:5,1:5]
## RET_ADGTNTGFPRyPNDSVYANWMLSPSAAK
## log2FC_Xenograft_EC_24h_2_Set1 -0.07401477
## log2FC_Xenograft_EC_24h_5_Set1 0.21847224
## log2FC_Xenograft_E_24h_4_Set1 0.22874181
## log2FC_Xenograft_ctrl_5d_7_Set1 0.54095417
## log2FC_Xenograft_ctrl_5d_1_Set1 -0.27106564
## RPSA_ADHQPLTEASyVNLPTIALCNTDSPLR PKP3_ADyDTLSLR
## log2FC_Xenograft_EC_24h_2_Set1 -0.5411173 0.64984600
## log2FC_Xenograft_EC_24h_5_Set1 0.3552827 -0.84664124
## log2FC_Xenograft_E_24h_4_Set1 -0.3858904 0.89538395
## log2FC_Xenograft_ctrl_5d_7_Set1 -0.1045738 0.01867478
## log2FC_Xenograft_ctrl_5d_1_Set1 1.0251007 -0.38241878
## ANXA2_AEDGSVIDyELIDQDAR
## log2FC_Xenograft_EC_24h_2_Set1 0.16930587
## log2FC_Xenograft_EC_24h_5_Set1 -0.62007389
## log2FC_Xenograft_E_24h_4_Set1 -0.09273979
## log2FC_Xenograft_ctrl_5d_7_Set1 0.31250121
## log2FC_Xenograft_ctrl_5d_1_Set1 0.30194393
## LYN_AEERPTFDYLQSVLDDFyTATEGQYQQQP
## log2FC_Xenograft_EC_24h_2_Set1 0.4838867
## log2FC_Xenograft_EC_24h_5_Set1 0.2236433
## log2FC_Xenograft_E_24h_4_Set1 -0.5954406
## log2FC_Xenograft_ctrl_5d_7_Set1 0.3808837
## log2FC_Xenograft_ctrl_5d_1_Set1 0.1752776
pY_Batch1_by_cond <- pY_Batch1_form %>%
pivot_longer(cols = contains("Xenograft") , names_to = "sample", values_to = "log2FC_over_ctrl") %>%
mutate(sample = str_remove( sample, "log2FC_Xenograft") ) %>%
separate( sample , into = c("xenograft", "treatment", "timepoint", "replicate", "set"), remove = F) %>%
group_by(Annotated_Sequence, HGNC_Symbol, Master.Protein.Accessions, Master.Protein.Descriptions, xenograft, treatment,
timepoint) %>%
summarise(mean_log2FC_per_xeno_treat_time = mean(log2FC_over_ctrl, na.rm = T)) %>%
ungroup() %>%
mutate(sample_group = paste0("Xenograft4", xenograft, "_" , treatment, "_" , timepoint )) %>%
select(-xenograft, -treatment, -timepoint) %>%
pivot_wider( values_from = mean_log2FC_per_xeno_treat_time, names_from = sample_group)
## `summarise()` has grouped output by 'Annotated_Sequence', 'HGNC_Symbol',
## 'Master.Protein.Accessions', 'Master.Protein.Descriptions', 'xenograft',
## 'treatment'. You can override using the `.groups` argument.
sum((is.na(pY_Batch1_by_cond) %>% rowSums() ) ==0 )
## [1] 231
pY_Batch1_by_cond_noNA <- pY_Batch1_by_cond %>% .[( (is.na(pY_Batch1_by_cond ) %>% rowSums() ) ==0),]
pY_mat_Batch1_by_cond_noNA <- pY_Batch1_by_cond_noNA %>%
mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("rname") %>%
select(all_of( contains("Xenograft") )) %>%
as.matrix() %>%
t
sum((is.na(pY_Batch1_by_cond %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d")) ) %>% rowSums() ) ==0 )
## [1] 627
pY_Batch1_by_cond_5d_noNA <- pY_Batch1_by_cond %>% .[( (is.na(pY_Batch1_by_cond %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d")) ) %>% rowSums() ) ==0 ), ] %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d") )
pY_mat_Batch1_by_cond_5d_noNA <- pY_Batch1_by_cond_5d_noNA %>%
mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("rname") %>%
select(all_of( contains("Xenograft") )) %>%
as.matrix() %>%
t
rm(list=setdiff(ls(), c("pY_mat_Batch1_by_cond_noNA", "pY_Batch1_by_cond_noNA",
"pY_mat_Batch1_by_cond_5d_noNA", "pY_Batch1_by_cond_5d_noNA" ) ))
load("../Data/Cache/Xenografts_Batch2_DataProcessing.RData")
pY_Set5_Xenograft1_form <- pY_Set5_normXenograft1_form %>%
select(Annotated_Sequence, HGNC_Symbol, Master.Protein.Accessions, Master.Protein.Descriptions, contains("log2FC_Xenograft1_"))
pY_Set5_Xenograft14_form <- pY_Set5_normXenograft14_form %>%
select(Annotated_Sequence, HGNC_Symbol, Master.Protein.Accessions, Master.Protein.Descriptions, contains("log2FC_Xenograft14_"))
all_pY_peptides_form <- rbind( (pY_Set1_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ),
(pY_Set3_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ),
(pY_Set4_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ),
(pY_Set5_Xenograft1_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ),
(pY_Set5_Xenograft14_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ) )%>%
unique
pY_Batch2_form <- plyr::join_all(list(all_pY_peptides_form, pY_Set1_form,
pY_Set3_form,pY_Set4_form,
pY_Set5_Xenograft1_form, pY_Set5_Xenograft14_form ),
by=c('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions' ),
type='left') %>% as_tibble()
#pY_Batch2[((is.na(pY_Batch2) %>% rowSums() ) ==0 ),] %>%
# pivot_longer(cols = contains("Xenograft") , names_to = "sample", values_to = "log2FC_over_ctrl") %>%
# mutate(sample = str_remove( sample, "log2FC_Xenograft") ) %>%
# separate( sample , into = c("xenograft", "treatment", "timepoint", "replicate", "set"), remove = F) %>%
# ggplot(aes(x = interaction(timepoint, treatment, xenograft), y = log2FC_over_ctrl, fill = treatment) ) +
# geom_boxplot() +
# geom_point() +
# theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
# facet_wrap(~ HGNC_Symbol + Annotated_Sequence)
pY_Batch2_form
## # A tibble: 1,380 × 72
## Annotated_Sequence HGNC_Symbol Master.Protein.Acces…¹ Master.Protein.Descr…²
## <chr> <chr> <chr> <chr>
## 1 AAGPGWAAyER IQGAP3 Q86VI3 Ras GTPase-activating…
## 2 AATSDLEHyDKTR NUCB2 P80303 Nucleobindin-2 OS=Hom…
## 3 AATSGVPSIYAPSTyAHL… LSR Q86X29 Lipolysis-stimulated …
## 4 AATSGVPSIyAPSTYAHL… LSR Q86X29 Lipolysis-stimulated …
## 5 AAVPSGASTGIyEALELR… ENO1 P06733 Alpha-enolase OS=Homo…
## 6 AAYFGIyDTAK SLC25A5 P05141 ADP/ATP translocase 2…
## 7 AAyFGIYDTAK SLC25A5 P05141 ADP/ATP translocase 2…
## 8 ACGSSEASAyLDELR TRPM4 Q8TD43 Transient receptor po…
## 9 ACPDSLGSPAPSHAyHGG… ANO1 Q5XXA6 Anoctamin-1 OS=Homo s…
## 10 ACQSIyPLHDVFVR RPS3A P61247 40S ribosomal protein…
## # ℹ 1,370 more rows
## # ℹ abbreviated names: ¹​Master.Protein.Accessions, ²​Master.Protein.Descriptions
## # ℹ 68 more variables: log2FC_Xenograft1_ctrl_5d_1_Set1 <dbl>,
## # log2FC_Xenograft1_ctrl_5d_2_Set1 <dbl>,
## # log2FC_Xenograft1_ctrl_5d_3_Set1 <dbl>,
## # log2FC_Xenograft1_ctrl_5d_4_Set1 <dbl>,
## # log2FC_Xenograft1_ctrl_5d_5_Set1 <dbl>, …
pY_mat_Batch2 <- pY_Batch2_form %>%
mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("rname") %>%
select(all_of( contains("Xenograft") )) %>%
as.matrix() %>%
t
pY_mat_Batch2[1:5,1:5]
## IQGAP3_AAGPGWAAyER NUCB2_AATSDLEHyDKTR
## log2FC_Xenograft1_ctrl_5d_1_Set1 0.22305250 0.6894926
## log2FC_Xenograft1_ctrl_5d_2_Set1 -0.44856667 -0.5706067
## log2FC_Xenograft1_ctrl_5d_3_Set1 -0.09876451 -0.5666204
## log2FC_Xenograft1_ctrl_5d_4_Set1 0.20796365 -0.3686920
## log2FC_Xenograft1_ctrl_5d_5_Set1 -0.19747354 0.2524627
## LSR_AATSGVPSIYAPSTyAHLSPAK
## log2FC_Xenograft1_ctrl_5d_1_Set1 -0.00930291
## log2FC_Xenograft1_ctrl_5d_2_Set1 -0.49322341
## log2FC_Xenograft1_ctrl_5d_3_Set1 0.23001078
## log2FC_Xenograft1_ctrl_5d_4_Set1 0.42668463
## log2FC_Xenograft1_ctrl_5d_5_Set1 -0.29054769
## LSR_AATSGVPSIyAPSTYAHLSPAK
## log2FC_Xenograft1_ctrl_5d_1_Set1 0.20438777
## log2FC_Xenograft1_ctrl_5d_2_Set1 -0.63605823
## log2FC_Xenograft1_ctrl_5d_3_Set1 -0.07443694
## log2FC_Xenograft1_ctrl_5d_4_Set1 0.33563739
## log2FC_Xenograft1_ctrl_5d_5_Set1 -0.07609956
## ENO1_AAVPSGASTGIyEALELRDNDK
## log2FC_Xenograft1_ctrl_5d_1_Set1 -0.8448133
## log2FC_Xenograft1_ctrl_5d_2_Set1 1.7020024
## log2FC_Xenograft1_ctrl_5d_3_Set1 0.7442778
## log2FC_Xenograft1_ctrl_5d_4_Set1 0.9193946
## log2FC_Xenograft1_ctrl_5d_5_Set1 -1.3016477
pY_Batch2_by_cond <- pY_Batch2_form %>%
pivot_longer(cols = contains("Xenograft") , names_to = "sample", values_to = "log2FC_over_ctrl") %>%
mutate(sample = str_remove( sample, "log2FC_Xenograft") ) %>%
separate( sample , into = c("xenograft", "treatment", "timepoint", "replicate", "set"), remove = F) %>%
group_by(Annotated_Sequence, HGNC_Symbol, Master.Protein.Accessions, Master.Protein.Descriptions, xenograft, treatment,
timepoint) %>%
summarise(mean_log2FC_per_xeno_treat_time = mean(log2FC_over_ctrl, na.rm = T)) %>%
ungroup() %>%
mutate(sample_group = paste0("Xenograft", xenograft, "_" , treatment, "_" , timepoint )) %>%
select(-xenograft, -treatment, -timepoint) %>%
pivot_wider( values_from = mean_log2FC_per_xeno_treat_time, names_from = sample_group)
## `summarise()` has grouped output by 'Annotated_Sequence', 'HGNC_Symbol',
## 'Master.Protein.Accessions', 'Master.Protein.Descriptions', 'xenograft',
## 'treatment'. You can override using the `.groups` argument.
sum((is.na(pY_Batch2_by_cond %>% select(-Xenograft14_EC_24h)) %>% rowSums() ) ==0 )
## [1] 294
pY_Batch2_by_cond_noNA <- pY_Batch2_by_cond %>% select(-Xenograft14_EC_24h) %>% .[( (is.na(pY_Batch2_by_cond %>% select(-Xenograft14_EC_24h)) %>% rowSums() ) ==0),]
pY_mat_Batch2_by_cond_noNA <- pY_Batch2_by_cond_noNA %>%
mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("rname") %>%
select(all_of( contains("Xenograft") )) %>%
as.matrix() %>%
t
sum((is.na(pY_Batch2_by_cond %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d")) ) %>% rowSums() ) ==0 )
## [1] 294
pY_Batch2_by_cond_5d_noNA <- pY_Batch2_by_cond %>% .[( (is.na(pY_Batch2_by_cond %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d")) ) %>% rowSums() ) ==0 ), ] %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d") )
pY_mat_Batch2_by_cond_5d_noNA <- pY_Batch2_by_cond_5d_noNA %>%
mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("rname") %>%
select(all_of( contains("Xenograft") )) %>%
as.matrix() %>%
t
rm(list=setdiff(ls(), c("pY_mat_Batch2_by_cond_noNA", "pY_Batch2_by_cond_noNA",
"pY_mat_Batch1_by_cond_noNA", "pY_Batch1_by_cond_noNA",
"pY_mat_Batch1_by_cond_5d_noNA", "pY_Batch1_by_cond_5d_noNA",
"pY_mat_Batch2_by_cond_5d_noNA", "pY_Batch2_by_cond_5d_noNA") ))
string_db <- STRINGdb::STRINGdb$new( version="11.5", species=9606,
score_threshold=400, input_directory="../../../General/Code/Data/")
kinase_substrate_dataset <- read_delim("../../../General/Code/Data/Kinase_Substrate_Dataset", delim = "\t", skip = 2) %>% filter(KIN_ORGANISM == "human")
## Rows: 22608 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (14): GENE, KINASE, KIN_ACC_ID, KIN_ORGANISM, SUBSTRATE, SUB_ACC_ID, SUB...
## dbl (2): SUB_GENE_ID, SITE_GRP_ID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
kinase_substrate_dataset
## # A tibble: 17,862 × 16
## GENE KINASE KIN_ACC_ID KIN_ORGANISM SUBSTRATE SUB_GENE_ID SUB_ACC_ID
## <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 EIF2AK1 HRI Q9BQI3 human eIF2-alpha 54318 P68101
## 2 EIF2AK1 HRI Q9BQI3 human eIF2-alpha 100339093 P83268
## 3 EIF2AK1 HRI Q9BQI3 human eIF2-alpha 1965 P05198
## 4 EIF2AK1 HRI Q9BQI3 human eIF2-alpha 1965 P05198
## 5 PRKCD PKCD Q05655 human syndecan-4 24771 P34901
## 6 PRKCD PKCD Q05655 human ANKRD54 223690 Q91WK7
## 7 PRKCD PKCD Q05655 human HDAC5 10014 Q9UQL6
## 8 PRKCD PKCD Q05655 human PTPRA iso2 5786 P18433-2
## 9 PRKCD PKCD Q05655 human Bcl-2 596 P10415
## 10 PRKCD PKCD Q05655 human hnRNP K 3190 P61978
## # ℹ 17,852 more rows
## # ℹ 9 more variables: SUB_GENE <chr>, SUB_ORGANISM <chr>, SUB_MOD_RSD <chr>,
## # SITE_GRP_ID <dbl>, `SITE_+/-7_AA` <chr>, DOMAIN <chr>, IN_VIVO_RXN <chr>,
## # IN_VITRO_RXN <chr>, `CST_CAT#` <chr>
Phosphorylation_site_dataset <- read_delim("../../../General/Code/Data/Phosphorylation_site_dataset", delim = "\t", skip = 2) %>%
filter(ORGANISM == "human")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 378662 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): GENE, PROTEIN, ACC_ID, HU_CHR_LOC, MOD_RSD, ORGANISM, DOMAIN, SITE_...
## dbl (7): SITE_GRP_ID, MW_kD, LT_LIT, MS_LIT, MS_CST, CST_CAT#, Ambiguous_Site
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Regulatory_sites_dataset <- read_delim("../../../General/Code/Data/Regulatory_sites", delim = "\t", skip = 2) %>%
filter(ORGANISM == "human") %>%
filter(str_detect( MOD_RSD, "p") )
## New names:
## • `` -> `...21`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 19135 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (15): GENE, PROTEIN, PROT_TYPE, ACC_ID, HU_CHR_LOC, ORGANISM, MOD_RSD, S...
## dbl (5): GENE_ID, SITE_GRP_ID, LT_LIT, MS_LIT, MS_CST
## lgl (1): ...21
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PtmSigdb <- readxl::read_excel("../../../General/Code/Data/db_ptm.sig.db.all.v2.0.0/data_PTMsigDB_all_sites_v2.0.0.xlsx")
## Warning: Expecting numeric in D66110 / R66110C4: got '2407414-sm;d'
## Warning: Expecting numeric in D66117 / R66117C4: got '3755605-sm;d'
## Warning: Expecting numeric in D66118 / R66118C4: got '3829801-sm;d'
## Warning: Expecting numeric in D66251 / R66251C4: got '2134717-sm;u'
## Warning: Expecting numeric in D66252 / R66252C4: got '2134718-sm;u'
## Warning: Expecting numeric in D66508 / R66508C4: got '1668802-me;u'
## Warning: Expecting numeric in D66525 / R66525C4: got '1668801-me;u'
## Warning: Expecting numeric in D66592 / R66592C4: got '2295400-sm;u'
## Warning: Expecting numeric in D66650 / R66650C4: got '60885600-pa;d'
## Warning: Expecting numeric in D66651 / R66651C4: got '60885601-pa;d'
## Warning: Expecting numeric in D66652 / R66652C4: got '8545503-sm;u'
## Warning: Expecting numeric in D66653 / R66653C4: got '8545504-sm;u'
## Warning: Expecting numeric in D66654 / R66654C4: got '8545505-sm;u'
## Warning: Expecting numeric in D66748 / R66748C4: got '31089521-sm;d'
## Warning: Expecting numeric in D66819 / R66819C4: got '1668802-me;d'
## Warning: Expecting numeric in D67030 / R67030C4: got '50455000-sm;u'
## Warning: Expecting numeric in D67031 / R67031C4: got '50455001-sm;u'
## Warning: Expecting numeric in D67032 / R67032C4: got '50455002-sm;u'
## Warning: Expecting numeric in D67043 / R67043C4: got '31094520-me;u'
## Warning: Expecting numeric in D67056 / R67056C4: got '27581511-ne;d'
## Warning: Expecting numeric in D67057 / R67057C4: got '27581513-ne;d'
## Warning: Expecting numeric in D67080 / R67080C4: got '2535901-sm;d'
## Warning: Expecting numeric in D67086 / R67086C4: got '25092200-sm;d'
## Warning: Expecting numeric in D67087 / R67087C4: got '25092201-sm;d'
## Warning: Expecting numeric in D67107 / R67107C4: got '8545502-sm;d'
## Warning: Expecting numeric in D67128 / R67128C4: got '2586358-me;u'
## Warning: Expecting numeric in D67129 / R67129C4: got '2586359-me;u'
## Warning: Expecting numeric in D67130 / R67130C4: got '2586360-me;u'
## Warning: Expecting numeric in D67505 / R67505C4: got '12723517-sm;u'
## Warning: Expecting numeric in D67506 / R67506C4: got '12723518-sm;u'
## Warning: Expecting numeric in D67511 / R67511C4: got '27850501-sm;d'
all_pY_peptides_form <- rbind( (pY_Batch1_by_cond_noNA %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ),
(pY_Batch2_by_cond_noNA %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ) ) %>%
unique
pY_all_xeno_form <- plyr::join_all(list(all_pY_peptides_form, pY_Batch1_by_cond_noNA,
pY_Batch2_by_cond_noNA),
by=c('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions' ),
type='left') %>% as_tibble()
sum((is.na(pY_all_xeno_form ) %>% rowSums() ) ==0 )
## [1] 111
pY_all_xeno_noNA <- pY_all_xeno_form %>% .[( (is.na(pY_all_xeno_form ) %>% rowSums() ) ==0),]
pY_mat_all_xeno_noNA <- pY_all_xeno_noNA %>%
mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("rname") %>%
select(all_of( contains("Xenograft") )) %>%
as.matrix() %>%
t
all_pY_peptides_5d_form <- rbind( (pY_Batch1_by_cond_5d_noNA %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ),
(pY_Batch2_by_cond_5d_noNA %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ) ) %>%
unique
pY_all_xeno_5d_form <- plyr::join_all(list(all_pY_peptides_5d_form, pY_Batch1_by_cond_5d_noNA,
pY_Batch2_by_cond_5d_noNA),
by=c('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions' ),
type='left') %>% as_tibble()
sum((is.na(pY_all_xeno_5d_form ) %>% rowSums() ) ==0 )
## [1] 178
pY_all_xeno_5d_noNA <- pY_all_xeno_5d_form %>% .[( (is.na(pY_all_xeno_5d_form ) %>% rowSums() ) ==0),]
pY_mat_all_xeno_5d_noNA <- pY_all_xeno_5d_noNA %>%
mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("rname") %>%
select(all_of( contains("Xenograft") )) %>%
as.matrix() %>%
t
# Assay data
assay_data_pY_all <-
pY_all_xeno_noNA %>%
mutate(coln = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
as.data.frame() %>%
column_to_rownames("coln") %>%
select( -Annotated_Sequence, -HGNC_Symbol,
-Master.Protein.Descriptions, -Master.Protein.Accessions) %>%
as.matrix()
# RowData
rowData_pY_all <-
pY_all_xeno_noNA %>%
select(Annotated_Sequence, HGNC_Symbol, Master.Protein.Descriptions, Master.Protein.Accessions) %>%
mutate(coln = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
mutate(ID = paste0(HGNC_Symbol, "_", Annotated_Sequence),
name = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
as.data.frame() %>%
column_to_rownames("coln")
# ColData
colData_pY_all <-
str_split(colnames(assay_data_pY_all #)
), pattern = "_", simplify = TRUE ) %>%
as.data.frame()
rownames(colData_pY_all) <- colnames(assay_data_pY_all)
colnames(colData_pY_all) <- c( "replicate", "condition", "day")
colData_pY_all$label <- colnames(assay_data_pY_all)
colData_pY_all$ID <- colnames(assay_data_pY_all)
pY_all_se <- SummarizedExperiment(assays = assay_data_pY_all,
colData = colData_pY_all,
rowData = rowData_pY_all)
validObject(pY_all_se)
## [1] TRUE
pY_all_se
## class: SummarizedExperiment
## dim: 111 19
## metadata(0):
## assays(1): ''
## rownames(111): PKP3_ADyDTLSLR PKM_AEGSDVANAVLDGADCIMLSGETAKGDyPLEAVR
## ... PTPRA_yVNILPYDHSR LHPP_yYKETSGLMLDVGPYMK
## rowData names(6): Annotated_Sequence HGNC_Symbol ... ID name
## colnames(19): Xenograft4_E_24h Xenograft4_E_5d ... Xenograft14_EC_5d
## Xenograft14_ctrl_5d
## colData names(5): replicate condition day label ID
# Assay data
assay_data_pY_5d <-
pY_all_xeno_5d_noNA %>%
mutate(coln = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
as.data.frame() %>%
column_to_rownames("coln") %>%
select( -Annotated_Sequence, -HGNC_Symbol,
-Master.Protein.Descriptions, -Master.Protein.Accessions) %>%
as.matrix()
# RowData
rowData_pY_5d <-
pY_all_xeno_5d_noNA %>%
select(Annotated_Sequence, HGNC_Symbol, Master.Protein.Descriptions, Master.Protein.Accessions) %>%
mutate(coln = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
mutate(ID = paste0(HGNC_Symbol, "_", Annotated_Sequence),
name = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
as.data.frame() %>%
column_to_rownames("coln")
# ColData
colData_pY_5d <-
str_split(colnames(assay_data_pY_5d #)
), pattern = "_", simplify = TRUE ) %>%
as.data.frame()
rownames(colData_pY_5d) <- colnames(assay_data_pY_5d)
colnames(colData_pY_5d) <- c( "replicate", "condition", "day")
colData_pY_5d$label <- colnames(assay_data_pY_5d)
colData_pY_5d$ID <- colnames(assay_data_pY_5d)
pY_5d_se <- SummarizedExperiment(assays = assay_data_pY_5d,
colData = colData_pY_5d,
rowData = rowData_pY_5d)
validObject(pY_5d_se)
## [1] TRUE
pY_5d_se
## class: SummarizedExperiment
## dim: 178 11
## metadata(0):
## assays(1): ''
## rownames(178): ANK3_ADIVQQLLQQGASPNAATTSGyTPLHLSAR PKP3_ADyDTLSLR ...
## PTPRA_yVNILPYDHSR LHPP_yYKETSGLMLDVGPYMK
## rowData names(6): Annotated_Sequence HGNC_Symbol ... ID name
## colnames(11): Xenograft4_E_5d Xenograft4_EBC_5d ... Xenograft14_EC_5d
## Xenograft14_ctrl_5d
## colData names(5): replicate condition day label ID
# TODO How to deal with peptides which have multiple phosphorylations?
Annotate_pY_sites <- function(all_pY_sites){
if(!all(str_count(all_pY_sites$Annotated_Sequence, "y") == 1)){warning("There are peptides with multiple pY sites")}
tmp_sequence <- paste0("jjjjjjj", all_pY_sites$Annotated_Sequence,"jjjjjjj" )
position_pY_phos <- str_locate(tmp_sequence, "y")[,1]
tmp_sequence <- str_sub(tmp_sequence, (position_pY_phos-7), position_pY_phos + 7)
tmp_sequence <- paste0( str_to_upper(str_sub(tmp_sequence,1,7 )),
str_to_lower(str_sub(tmp_sequence,8,8 )),
str_to_upper(str_sub(tmp_sequence,9,15 )))
tmp_sequence <- str_replace_all(string = tmp_sequence, pattern ="J", replacement = "")
all_pY_sites$Modified_Sequence <- tmp_sequence
Phosphorylation_site_dataset_pY <-
Phosphorylation_site_dataset %>% filter(GENE %in% all_pY_sites$HGNC_Symbol) %>%
filter(!str_detect(PROTEIN, "iso")) %>%
filter( str_detect( `SITE_+/-7_AA`, "y" ) ) %>%
mutate(Modified_Sequence = paste0( str_to_upper(str_sub(`SITE_+/-7_AA`,1,7 )),
str_to_lower(str_sub(`SITE_+/-7_AA`,8,8 )),
str_to_upper(str_sub(`SITE_+/-7_AA`,9,16 ))) ) %>%
select(GENE, PROTEIN, HU_CHR_LOC, MOD_RSD, SITE_GRP_ID, DOMAIN, Modified_Sequence) %>%
unique()
Return_MOD_RSD <- function(HGNC_Symbol, Modified_Sequence){
selprot <- Phosphorylation_site_dataset_pY %>% filter(GENE == HGNC_Symbol)
if(length(str_which(selprot$Modified_Sequence, Modified_Sequence))==1){
selsite <- selprot[ str_which(selprot$Modified_Sequence, Modified_Sequence), ]
return(selsite$MOD_RSD)}else{return("NA")}
}
Return_DOMAIN <- function(HGNC_Symbol, Modified_Sequence){
selprot <- Phosphorylation_site_dataset_pY %>% filter(GENE == HGNC_Symbol)
if(length(str_which(selprot$Modified_Sequence, Modified_Sequence))==1){
selsite <- selprot[ str_which(selprot$Modified_Sequence, Modified_Sequence), ]
return(selsite$DOMAIN)}else{return("NA")}
}
Return_SITE_GRP_ID <- function(HGNC_Symbol, Modified_Sequence){
selprot <- Phosphorylation_site_dataset_pY %>% filter(GENE == HGNC_Symbol)
if(length(str_which(selprot$Modified_Sequence, Modified_Sequence))==1){
selsite <- selprot[ str_which(selprot$Modified_Sequence, Modified_Sequence), ]
return(selsite$SITE_GRP_ID)}else{return("NA")}
}
all_pY_sites$DOMAIN <- apply(all_pY_sites,1, function(x){ Return_DOMAIN(x[1], x[3] )} )
all_pY_sites$MOD_RSD <- apply(all_pY_sites,1, function(x){ Return_MOD_RSD(x[1], x[3] )} )
all_pY_sites$SITE_GRP_ID <- as.double( apply(all_pY_sites,1, function(x){ Return_SITE_GRP_ID(x[1], x[3] )} ) )
return(all_pY_sites)
#all_pY_sites <- left_join(all_pY_sites,
# kinase_substrate_dataset %>% select(GENE, KINASE, SUBSTRATE, SUB_GENE, SUB_MOD_RSD, SITE_GRP_ID),
# by= "SITE_GRP_ID")
#
#all_pY_sites <- left_join(all_pY_sites,
# Regulatory_sites_dataset %>% filter(GENE %in% unique(all_pY_sites$HGNC_Symbol), str_detect(MOD_RSD, "Y") ) ,
# by = c( "HGNC_Symbol" = "GENE", "MOD_RSD" ))
}
all_pY_sites <- bind_rows(
pY_all_xeno_noNA %>% select(HGNC_Symbol, Annotated_Sequence),
pY_all_xeno_5d_noNA %>% select(HGNC_Symbol, Annotated_Sequence)) %>% unique
all_pY_sites <- Annotate_pY_sites(all_pY_sites)
## Warning in Annotate_pY_sites(all_pY_sites): NAs introduced by coercion
print( paste( sum((is.na(pY_Batch1_by_cond_noNA) %>% rowSums() ) ==0 ) , "pY peptides passed the filtering procedure for the sets combined and the values are summed accross conditions" ))
## [1] "231 pY peptides passed the filtering procedure for the sets combined and the values are summed accross conditions"
print( paste( sum((is.na(pY_Batch2_by_cond_noNA) %>% rowSums() ) ==0 ) , "pY peptides passed the filtering procedure for the sets combined when Xenograft14_EC_24h is filtered out and the values are summed accross conditions" ))
## [1] "294 pY peptides passed the filtering procedure for the sets combined when Xenograft14_EC_24h is filtered out and the values are summed accross conditions"
print( paste( sum((is.na(pY_all_xeno_noNA) %>% rowSums() ) ==0 ) , "pY peptides passed the filtering procedure for the batches combined" ))
## [1] "111 pY peptides passed the filtering procedure for the batches combined"
print( paste( sum((is.na(pY_all_xeno_5d_noNA) %>% rowSums() ) ==0 ) , "pY peptides passed the filtering procedure for the batches combined" ))
## [1] "178 pY peptides passed the filtering procedure for the batches combined"
pY_mat_all_xeno_noNA %>%
prcomp() %>%
summary() %>%
.$importance %>% t() %>% as.data.frame() %>%
rownames_to_column("PC") %>%
as_tibble() %>%
mutate(PC = as.factor(PC)) %>%
mutate(PC = factor(PC, levels = PC)) %>%
ggplot(aes(PC, `Proportion of Variance` )) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))
PCA_pY <- pY_mat_all_xeno_noNA %>%
prcomp() %>%
.$x %>%
as.data.frame() %>%
rownames_to_column("sample")
PCApY_rot <- pY_mat_all_xeno_noNA %>%
prcomp() %>%
.$rotation
PCApY_rot <- bind_cols(PCApY_rot,
(pY_all_xeno_noNA %>% select(HGNC_Symbol, Annotated_Sequence) ) ) %>% arrange(desc(PC1) )
PCA_pY <- PCA_pY %>%
mutate(sample= str_remove(sample,"Xenograft")) %>%
separate( sample , into = c("xenograft", "treatment", "day"), sep = "_")
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = treatment, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = treatment, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = day, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = day, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = xenograft, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = xenograft, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
pY_mat_all_xeno_noNA[str_detect( rownames(pY_mat_all_xeno_noNA), "5d") | str_detect( rownames(pY_mat_all_xeno_noNA), "ctrl"),] %>%
prcomp() %>%
summary() %>%
.$importance %>% t() %>% as.data.frame() %>%
rownames_to_column("PC") %>%
as_tibble() %>%
mutate(PC = as.factor(PC)) %>%
mutate(PC = factor(PC, levels = PC)) %>%
ggplot(aes(PC, `Proportion of Variance` )) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))
PCA_pY <- pY_mat_all_xeno_noNA[str_detect( rownames(pY_mat_all_xeno_noNA), "5d") | str_detect( rownames(pY_mat_all_xeno_noNA), "ctrl"),] %>%
prcomp() %>%
.$x %>%
as.data.frame() %>%
rownames_to_column("sample")
PCApY_rot <- pY_mat_all_xeno_noNA[str_detect( rownames(pY_mat_all_xeno_noNA), "5d") | str_detect( rownames(pY_mat_all_xeno_noNA), "ctrl"),] %>%
prcomp() %>%
.$rotation
PCApY_rot <- bind_cols(PCApY_rot,
(pY_all_xeno_noNA %>% select(HGNC_Symbol, Annotated_Sequence) ) ) %>% arrange(desc(PC1) )
PCA_pY <- PCA_pY %>%
mutate(sample= str_remove(sample,"Xenograft")) %>%
separate( sample , into = c("xenograft", "treatment", "day"), sep = "_")
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = treatment, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = treatment, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = day, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = day, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = xenograft, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = xenograft, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
pY_mat_all_xeno_5d_noNA %>%
prcomp() %>%
summary() %>%
.$importance %>% t() %>% as.data.frame() %>%
rownames_to_column("PC") %>%
as_tibble() %>%
mutate(PC = as.factor(PC)) %>%
mutate(PC = factor(PC, levels = PC)) %>%
ggplot(aes(PC, `Proportion of Variance` )) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))
PCA_pY <- pY_mat_all_xeno_5d_noNA %>%
prcomp() %>%
.$x %>%
as.data.frame() %>%
rownames_to_column("sample")
PCApY_rot <- pY_mat_all_xeno_5d_noNA %>%
prcomp() %>%
.$rotation
PCApY_rot <- bind_cols(PCApY_rot,
(pY_all_xeno_5d_noNA %>% select(HGNC_Symbol, Annotated_Sequence) ) ) %>% arrange(desc(PC1) )
PCA_pY <- PCA_pY %>%
mutate(sample= str_remove(sample,"Xenograft")) %>%
separate( sample , into = c("xenograft", "treatment", "day"), sep = "_")
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = treatment, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = treatment, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = day, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = day, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
PCA12_pY <- PCA_pY %>%
ggplot(aes(PC1, PC2, color = xenograft, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
PCA34_pY <- PCA_pY %>%
ggplot(aes(PC3, PC4, color = xenograft, shape = day )) +
geom_point(size = 3) +
theme_classic() +
scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
ggtitle("pY")
ggpubr::ggarrange(PCA12_pY, PCA34_pY)
data_diff_E_vs_ctrl_pY <- test_diff(pY_all_se, type="manual", test = "E_vs_ctrl")
## Tested contrasts: E_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_E_vs_ctrl_pY <- add_rejections_SH(data_diff_E_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_E_vs_ctrl_pY, contrast = "E_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## Loading required namespace: reactome.db
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Note: Row-scaling applied for this heatmap
data_diff_EC_vs_ctrl_pY <- test_diff(pY_all_se, type="manual", test = "EC_vs_ctrl")
## Tested contrasts: EC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_ctrl_pY <- add_rejections_SH(data_diff_EC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_ctrl_pY, contrast = "EC_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
Plot_Enrichment_Single_Pathway(dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff",
pw = "Epigenetic regulation of gene expression")
data_diff_EBC_vs_ctrl_pY <- test_diff(pY_all_se, type="manual", test = "EBC_vs_ctrl")
## Tested contrasts: EBC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_ctrl_pY <- add_rejections_SH(data_diff_EBC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_ctrl_pY, contrast = "EBC_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EBC_vs_ctrl_pY, comparison = "EBC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Warning in max(screen_pval05_pos[, logFcColStr]): no non-missing arguments to
## max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(cs1s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs1s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in min(cs2s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs2s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in min(cs3s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs3s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Note: Row-scaling applied for this heatmap
data_diff_EC_vs_E_pY <- test_diff(pY_all_se, type = "manual",
test = c("EC_vs_E"))
## Tested contrasts: EC_vs_E
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_E_pY <- add_rejections_SH(data_diff_EC_vs_E_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_E_pY, contrast = "EC_vs_E", add_names = TRUE, additional_title = "pY", proteins_of_interest = "EGFR")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EC_vs_E_pY, comparison = "EC_vs_E_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## [1] "Signal attenuation" "Cytokine Signaling in Immune system"
## [3] "RAF-independent MAPK1/3 activation"
## Note: Row-scaling applied for this heatmap
#data_results <- get_df_long(dep)
data_diff_EBC_vs_EC_pY <- test_diff(pY_all_se, type = "manual",
test = c("EBC_vs_EC"))
## Tested contrasts: EBC_vs_EC
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_EC_pY <- add_rejections_SH(data_diff_EBC_vs_EC_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_EC_pY, contrast = "EBC_vs_EC", add_names = TRUE, additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EBC_vs_EC_pY, comparison = "EBC_vs_EC_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
#data_results <- get_df_long(dep)
data_diff_E_vs_ctrl_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type="manual", test = "E_vs_ctrl")
## Tested contrasts: E_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_E_vs_ctrl_pY <- add_rejections_SH(data_diff_E_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_E_vs_ctrl_pY, contrast = "E_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
rowData(dep_E_vs_ctrl_pY) %>% as_tibble() %>% select(HGNC_Symbol, E_vs_ctrl_diff) %>% write.table("../Data/Kinase_enrichment/AllXenograftsCombined_E_vs_ctrl_pY_forstring.txt", quote = F, row.names = F, col.names = F)
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
GSEA_E_vs_ctrl <- Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T)
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
GSEA_E_vs_ctrl %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 0 × 8
## # ℹ 8 variables: pathway <chr>, pval <dbl>, padj <dbl>, log2err <dbl>,
## # ES <dbl>, NES <dbl>, size <int>, leadingEdge <list>
GSEA_E_vs_ctrl_PTM <- Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T)
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY"
GSEA_E_vs_ctrl_PTM %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 1 × 8
## pathway pval padj log2err ES NES size leadingEdge
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <list>
## 1 PATH-NP_EGFR1_PATHWAY 0.000104 0.0284 0.538 0.636 1.92 41 <chr [18]>
Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_EGFR1_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY" "PERT-PSP_HGF"
## [3] "PERT-PSP_PAR1_ACTIVATING_PEPTIDE"
## Joining with `by = join_by(SITE_GRP_ID)`
## # A tibble: 47 × 4
## HGNC_Symbol Annotated_Sequence MOD_RSD FC
## <chr> <chr> <chr> <dbl>
## 1 CDK1 IGEGTYGVVyKGR Y19-p 1.41
## 2 CTNND1 HYEDGYPGGSDNyGSLSR Y228-p 1.24
## 3 PTPN11 VyENVGLMQQQK Y580-p 1.04
## 4 PXN VGEEEHVySFPNK Y118-p 1.01
## 5 PKP4 STTNyVDFYSTK Y1168-p 0.998
## 6 MAPK3 IADPEHDHTGFLTEyVATR Y204-p 0.965
## 7 MAPK3 IADPEHDHTGFLtEyVATR Y204-p 0.965
## 8 PRKCD RSDSASSEPVGIyQGFEK Y313-p 0.904
## 9 PRKCD SDSASSEPVGIyQGFEK Y313-p 0.904
## 10 PRKCD SDSASSEPVGIyQGFEKK Y313-p 0.904
## # ℹ 37 more rows
Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_KIT_RECEPTOR_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY"
## Joining with `by = join_by(SITE_GRP_ID)`
## # A tibble: 4 × 4
## HGNC_Symbol Annotated_Sequence MOD_RSD FC
## <chr> <chr> <chr> <dbl>
## 1 MAPK3 IADPEHDHTGFLTEyVATR Y204-p 0.965
## 2 MAPK3 IADPEHDHTGFLtEyVATR Y204-p 0.965
## 3 MAPK1 VADPDHDHTGFLTEyVATR Y187-p 0.872
## 4 MAPK1 VADPDHDHTGFLtEyVATR Y187-p 0.872
data_diff_EC_vs_ctrl_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type="manual", test = "EC_vs_ctrl")
## Tested contrasts: EC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_ctrl_pY <- add_rejections_SH(data_diff_EC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_ctrl_pY, contrast = "EC_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Note: Row-scaling applied for this heatmap
Plot_Enrichment_Single_Pathway(dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff",
pw = "Epigenetic regulation of gene expression")
data_diff_EBC_vs_ctrl_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type="manual", test = "EBC_vs_ctrl")
## Tested contrasts: EBC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_ctrl_pY <- add_rejections_SH(data_diff_EBC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_ctrl_pY, contrast = "EBC_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EBC_vs_ctrl_pY, comparison = "EBC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Warning in max(screen_pval05_pos[, logFcColStr]): no non-missing arguments to
## max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(cs1s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs1s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in min(cs2s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs2s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in min(cs3s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs3s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Note: Row-scaling applied for this heatmap
data_diff_EC_vs_E_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type = "manual",
test = c("EC_vs_E"))
## Tested contrasts: EC_vs_E
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_E_pY <- add_rejections_SH(data_diff_EC_vs_E_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_E_pY, contrast = "EC_vs_E", add_names = TRUE, additional_title = "pY", proteins_of_interest = "EGFR")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EC_vs_E_pY, comparison = "EC_vs_E_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Note: Row-scaling applied for this heatmap
#data_results <- get_df_long(dep)
data_diff_EBC_vs_EC_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type = "manual",
test = c("EBC_vs_EC"))
## Tested contrasts: EBC_vs_EC
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_EC_pY <- add_rejections_SH(data_diff_EBC_vs_EC_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_EC_pY, contrast = "EBC_vs_EC", add_names = TRUE, additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EBC_vs_EC_pY, comparison = "EBC_vs_EC_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
#data_results <- get_df_long(dep)
data_diff_E_vs_ctrl_pY <- test_diff(pY_5d_se, type="manual", test = "E_vs_ctrl")
## Tested contrasts: E_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_E_vs_ctrl_pY <- add_rejections_SH(data_diff_E_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_E_vs_ctrl_pY, contrast = "E_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## [1] "Signaling by Receptor Tyrosine Kinases"
## [2] "RHO GTPases Activate NADPH Oxidases"
## [3] "GAB1 signalosome"
## [4] "Estrogen-dependent nuclear events downstream of ESR-membrane signaling"
## [5] "Golgi Cisternae Pericentriolar Stack Reorganization"
## [6] "Negative regulation of FGFR1 signaling"
GSEA_E_vs_ctrl <- Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T)
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## [1] "Signaling by Receptor Tyrosine Kinases"
## [2] "RHO GTPases Activate NADPH Oxidases"
## [3] "GAB1 signalosome"
## [4] "Estrogen-dependent nuclear events downstream of ESR-membrane signaling"
## [5] "Golgi Cisternae Pericentriolar Stack Reorganization"
## [6] "Negative regulation of FGFR1 signaling"
GSEA_E_vs_ctrl %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 13 × 8
## pathway pval padj log2err ES NES size leadingEdge
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <list>
## 1 Signaling by Receptor T… 6.86e-5 0.0221 0.538 0.643 1.93 32 <chr [15]>
## 2 Signal Transduction 1.51e-4 0.0221 0.519 0.579 1.84 59 <chr [19]>
## 3 G alpha (q) signalling … 5.02e-4 0.0394 0.477 0.815 1.81 7 <chr [5]>
## 4 RHO GTPases Activate NA… 1.81e-4 0.0221 0.519 0.946 1.79 4 <chr [3]>
## 5 RAF-independent MAPK1/3… 6.67e-4 0.0483 0.477 0.889 1.79 5 <chr [4]>
## 6 GAB1 signalosome 3.96e-4 0.0373 0.498 0.934 1.77 4 <chr [3]>
## 7 Estrogen-dependent nucl… 4.78e-4 0.0394 0.498 0.931 1.76 4 <chr [4]>
## 8 Golgi Cisternae Pericen… 2.11e-4 0.0221 0.519 0.969 1.69 3 <chr [3]>
## 9 Negative regulation of … 2.11e-4 0.0221 0.519 0.969 1.69 3 <chr [3]>
## 10 Negative regulation of … 2.11e-4 0.0221 0.519 0.969 1.69 3 <chr [3]>
## 11 Negative regulation of … 2.11e-4 0.0221 0.519 0.969 1.69 3 <chr [3]>
## 12 Negative regulation of … 2.11e-4 0.0221 0.519 0.969 1.69 3 <chr [3]>
## 13 Spry regulation of FGF … 2.11e-4 0.0221 0.519 0.969 1.69 3 <chr [3]>
GSEA_E_vs_ctrl_PTM <- Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T)
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PERT-PSP_VANADATE" "PATH-NP_EGFR1_PATHWAY"
## [3] "PERT-PSP_IL_2" "PERT-PSP_HGF"
## [5] "PERT-PSP_PAR1_ACTIVATING_PEPTIDE"
GSEA_E_vs_ctrl_PTM %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 14 × 8
## pathway pval padj log2err ES NES size leadingEdge
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <list>
## 1 PERT-PSP_PHORBOL_ESTER 4.78e-5 0.00651 0.557 0.932 1.86 5 <chr [4]>
## 2 PERT-PSP_VANADATE 5.50e-5 0.00651 0.557 0.930 1.85 5 <chr [4]>
## 3 PATH-NP_EGFR1_PATHWAY 1.15e-4 0.00811 0.538 0.571 1.85 53 <chr [19]>
## 4 PERT-PSP_THROMBIN 1.36e-4 0.00811 0.519 0.879 1.83 6 <chr [5]>
## 5 PATH-NP_KIT_RECEPTOR_P… 6.55e-5 0.00651 0.538 0.957 1.80 4 <chr [4]>
## 6 PERT-PSP_ANTI_CD3 1.60e-3 0.0341 0.455 0.752 1.76 9 <chr [5]>
## 7 PERT-PSP_ERLOTINIB 1.36e-3 0.0313 0.455 0.894 1.68 4 <chr [4]>
## 8 PERT-PSP_IL_2 1.36e-3 0.0313 0.455 0.894 1.68 4 <chr [4]>
## 9 PERT-PSP_HGF 8.85e-4 0.0313 0.477 0.957 1.67 3 <chr [3]>
## 10 PERT-PSP_PAR1_ACTIVATI… 8.85e-4 0.0313 0.477 0.957 1.67 3 <chr [3]>
## 11 PERT-PSP_SPHINGOSINE_1… 8.85e-4 0.0313 0.477 0.957 1.67 3 <chr [3]>
## 12 PERT-PSP_CYCLIC_STRETCH 1.27e-3 0.0313 0.455 0.951 1.66 3 <chr [3]>
## 13 PERT-PSP_FIBRONECTIN 1.27e-3 0.0313 0.455 0.951 1.66 3 <chr [3]>
## 14 PERT-PSP_RGD 1.27e-3 0.0313 0.455 0.951 1.66 3 <chr [3]>
Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_EGFR1_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY" "PERT-PSP_ERLOTINIB"
## [3] "PERT-PSP_IL_2" "PERT-PSP_HGF"
## [5] "PERT-PSP_PAR1_ACTIVATING_PEPTIDE"
## Joining with `by = join_by(SITE_GRP_ID)`
## # A tibble: 59 × 4
## HGNC_Symbol Annotated_Sequence MOD_RSD FC
## <chr> <chr> <chr> <dbl>
## 1 CDK1 IGEGTYGVVyKGR Y19-p 1.41
## 2 CTNND1 HYEDGYPGGSDNyGSLSR Y228-p 1.24
## 3 PRKCD RSDSASSEPVGIyQGFEK Y313-p 1.22
## 4 PRKCD SDSASSEPVGIyQGFEK Y313-p 1.22
## 5 PRKCD SDSASSEPVGIyQGFEKK Y313-p 1.22
## 6 PTPN11 VyENVGLMQQQK Y580-p 1.04
## 7 PXN VGEEEHVySFPNK Y118-p 1.01
## 8 PKP4 STTNyVDFYSTK Y1168-p 0.998
## 9 MAPK3 IADPEHDHTGFLTEyVATR Y204-p 0.965
## 10 MAPK3 IADPEHDHTGFLtEyVATR Y204-p 0.965
## # ℹ 49 more rows
Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_KIT_RECEPTOR_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY" "PERT-PSP_HGF"
## [3] "PERT-PSP_PAR1_ACTIVATING_PEPTIDE"
## Joining with `by = join_by(SITE_GRP_ID)`
## # A tibble: 6 × 4
## HGNC_Symbol Annotated_Sequence MOD_RSD FC
## <chr> <chr> <chr> <dbl>
## 1 MAPK3 IADPEHDHTGFLTEyVATR Y204-p 0.965
## 2 MAPK3 IADPEHDHTGFLtEyVATR Y204-p 0.965
## 3 EGFR GSHQISLDNPDyQQDFFPK Y1172-p 0.930
## 4 MAPK1 VADPDHDHTGFLTEyVATR Y187-p 0.872
## 5 MAPK1 VADPDHDHTGFLtEyVATR Y187-p 0.872
## 6 PTK2 THAVSVSETDDyAEIIDEEDTYTMPSTR Y397-p 0.868
data_diff_EC_vs_ctrl_pY <- test_diff(pY_5d_se, type="manual", test = "EC_vs_ctrl")
## Tested contrasts: EC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_ctrl_pY <- add_rejections_SH(data_diff_EC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_ctrl_pY, contrast = "EC_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Note: Row-scaling applied for this heatmap
GSEA_EC_vs_ctrl <- Run_GSEA(DEP_result = dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", return_df = T)
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
GSEA_EC_vs_ctrl %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 0 × 8
## # ℹ 8 variables: pathway <chr>, pval <dbl>, padj <dbl>, log2err <dbl>,
## # ES <dbl>, NES <dbl>, size <int>, leadingEdge <list>
GSEA_EC_vs_ctrl_PTM <- Run_GSEA(DEP_result = dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T)
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## character(0)
GSEA_EC_vs_ctrl_PTM %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 0 × 8
## # ℹ 8 variables: pathway <chr>, pval <dbl>, padj <dbl>, log2err <dbl>,
## # ES <dbl>, NES <dbl>, size <int>, leadingEdge <list>
Run_GSEA(DEP_result = dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_EGFR1_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## character(0)
## Joining with `by = join_by(SITE_GRP_ID)`
## # A tibble: 59 × 4
## HGNC_Symbol Annotated_Sequence MOD_RSD FC
## <chr> <chr> <chr> <dbl>
## 1 PXN VGEEEHVySFPNK Y118-p 0.617
## 2 LYN EKAEERPTFDYLQSVLDDFYTATEGQyQQQP Y508-p 0.387
## 3 LYN AEERPTFDYLQSVLDDFYTATEGQyQQQP Y508-p 0.387
## 4 PKP3 ADyDTLSLR Y176-p 0.361
## 5 CTNND1 HYEDGYPGGSDNyGSLSR Y228-p 0.345
## 6 PKP4 STTNyVDFYSTK Y1168-p 0.287
## 7 ESYT1 HLSPyATLTVGDSSHK Y822-p 0.268
## 8 EGFR GSHQISLDNPDyQQDFFPK Y1172-p 0.260
## 9 PTPRA VVQEYIDAFSDyANFK Y798-p 0.215
## 10 INPPL1 NSFNNPAyYVLEGVPHQLLPPEPPSPAR Y986-p 0.191
## # ℹ 49 more rows
Run_GSEA(DEP_result = dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", return_df = T,
ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_KIT_RECEPTOR_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## character(0)
## Joining with `by = join_by(SITE_GRP_ID)`
## # A tibble: 6 × 4
## HGNC_Symbol Annotated_Sequence MOD_RSD FC
## <chr> <chr> <chr> <dbl>
## 1 PTK2 THAVSVSETDDyAEIIDEEDTYTMPSTR Y397-p 0.384
## 2 EGFR GSHQISLDNPDyQQDFFPK Y1172-p 0.260
## 3 MAPK1 VADPDHDHTGFLTEyVATR Y187-p 0.0162
## 4 MAPK1 VADPDHDHTGFLtEyVATR Y187-p 0.0162
## 5 MAPK3 IADPEHDHTGFLTEyVATR Y204-p -0.233
## 6 MAPK3 IADPEHDHTGFLtEyVATR Y204-p -0.233
data_diff_EBC_vs_ctrl_pY <- test_diff(pY_5d_se, type="manual", test = "EBC_vs_ctrl")
## Tested contrasts: EBC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_ctrl_pY <- add_rejections_SH(data_diff_EBC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_ctrl_pY, contrast = "EBC_vs_ctrl",
add_names = TRUE,
additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_EBC_vs_ctrl_pY, comparison = "EBC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Note: Row-scaling applied for this heatmap
data_diff_EC_vs_E_pY <- test_diff(pY_5d_se, type = "manual",
test = c("EC_vs_E"))
## Tested contrasts: EC_vs_E
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_E_pY <- add_rejections_SH(data_diff_EC_vs_E_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_E_pY, contrast = "EC_vs_E", add_names = TRUE, additional_title = "pY", proteins_of_interest = "EGFR")
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_EC_vs_E_pY, comparison = "EC_vs_E_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## [1] "Golgi Cisternae Pericentriolar Stack Reorganization"
## [2] "Signal Transduction"
## [3] "Cytokine Signaling in Immune system"
## [4] "RHO GTPases Activate NADPH Oxidases"
## [5] "Insulin receptor signalling cascade"
## [6] "PIP3 activates AKT signaling"
## Note: Row-scaling applied for this heatmap
#data_results <- get_df_long(dep)
data_diff_EBC_vs_EC_pY <- test_diff(pY_5d_se, type = "manual",
test = c("EBC_vs_EC"))
## Tested contrasts: EBC_vs_EC
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_EC_pY <- add_rejections_SH(data_diff_EBC_vs_EC_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_EC_pY, contrast = "EBC_vs_EC", add_names = TRUE, additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_EBC_vs_EC_pY, comparison = "EBC_vs_EC_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
#data_results <- get_df_long(dep)
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.2 forcats_1.0.0
## [3] stringr_1.5.0 dplyr_1.1.2
## [5] purrr_1.0.2 readr_2.1.4
## [7] tidyr_1.3.0 tibble_3.2.1
## [9] ggplot2_3.4.2 tidyverse_2.0.0
## [11] mdatools_0.14.0 SummarizedExperiment_1.28.0
## [13] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
## [15] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [17] DEP_1.20.0 org.Hs.eg.db_3.16.0
## [19] AnnotationDbi_1.60.2 IRanges_2.32.0
## [21] S4Vectors_0.36.2 Biobase_2.58.0
## [23] BiocGenerics_0.44.0 fgsea_1.24.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 shinydashboard_0.7.2 proto_1.0.0
## [4] gmm_1.8 tidyselect_1.2.0 RSQLite_2.3.1
## [7] htmlwidgets_1.6.2 grid_4.2.3 BiocParallel_1.32.6
## [10] norm_1.0-11.1 munsell_0.5.0 codetools_0.2-19
## [13] preprocessCore_1.60.2 chron_2.3-61 DT_0.28
## [16] withr_2.5.0 colorspace_2.1-0 highr_0.10
## [19] knitr_1.43 rstudioapi_0.15.0 ggsignif_0.6.4
## [22] mzID_1.36.0 labeling_0.4.2 GenomeInfoDbData_1.2.9
## [25] bit64_4.0.5 farver_2.1.1 pheatmap_1.0.12
## [28] vctrs_0.6.3 generics_0.1.3 xfun_0.40
## [31] timechange_0.2.0 R6_2.5.1 doParallel_1.0.17
## [34] clue_0.3-64 MsCoreUtils_1.10.0 bitops_1.0-7
## [37] cachem_1.0.8 DelayedArray_0.24.0 assertthat_0.2.1
## [40] promises_1.2.1 scales_1.2.1 vroom_1.6.3
## [43] gtable_0.3.3 affy_1.76.0 sandwich_3.0-2
## [46] rlang_1.1.1 mzR_2.32.0 GlobalOptions_0.1.2
## [49] rstatix_0.7.2 lazyeval_0.2.2 impute_1.72.3
## [52] broom_1.0.5 BiocManager_1.30.22 yaml_2.3.7
## [55] abind_1.4-5 crosstalk_1.2.0 backports_1.4.1
## [58] httpuv_1.6.11 tools_4.2.3 affyio_1.68.0
## [61] ellipsis_0.3.2 gplots_3.1.3 jquerylib_0.1.4
## [64] RColorBrewer_1.1-3 STRINGdb_2.10.1 MSnbase_2.24.2
## [67] gsubfn_0.7 Rcpp_1.0.11 hash_2.2.6.2
## [70] plyr_1.8.8 zlibbioc_1.44.0 RCurl_1.98-1.12
## [73] ggpubr_0.6.0 sqldf_0.4-11 GetoptLong_1.0.5
## [76] cowplot_1.1.1 zoo_1.8-12 cluster_2.1.4
## [79] magrittr_2.0.3 data.table_1.14.8 circlize_0.4.15
## [82] reactome.db_1.82.0 pcaMethods_1.90.0 mvtnorm_1.2-2
## [85] ProtGenerics_1.30.0 hms_1.1.3 mime_0.12
## [88] evaluate_0.21 xtable_1.8-4 XML_3.99-0.14
## [91] readxl_1.4.3 shape_1.4.6 compiler_4.2.3
## [94] KernSmooth_2.23-22 ncdf4_1.21 crayon_1.5.2
## [97] htmltools_0.5.6 later_1.3.1 tzdb_0.4.0
## [100] DBI_1.1.3 ComplexHeatmap_2.14.0 MASS_7.3-60
## [103] tmvtnorm_1.5 Matrix_1.6-0 car_3.1-2
## [106] cli_3.6.1 vsn_3.66.0 imputeLCMD_2.1
## [109] parallel_4.2.3 igraph_1.5.1 pkgconfig_2.0.3
## [112] plotly_4.10.2 MALDIquant_1.22.1 foreach_1.5.2
## [115] bslib_0.5.0 XVector_0.38.0 digest_0.6.33
## [118] Biostrings_2.66.0 rmarkdown_2.23 cellranger_1.1.0
## [121] fastmatch_1.1-3 shiny_1.7.4.1 gtools_3.9.4
## [124] rjson_0.2.21 lifecycle_1.0.3 jsonlite_1.8.7
## [127] carData_3.0-5 viridisLite_0.4.2 limma_3.54.2
## [130] fansi_1.0.4 pillar_1.9.0 lattice_0.21-8
## [133] KEGGREST_1.38.0 fastmap_1.1.1 httr_1.4.6
## [136] plotrix_3.8-2 glue_1.6.2 fdrtool_1.2.17
## [139] png_0.1-8 iterators_1.0.14 bit_4.0.5
## [142] stringi_1.7.12 sass_0.4.7 blob_1.2.4
## [145] caTools_1.18.2 memoise_2.0.1
knitr::knit_exit()